I performed 100 resampling permutations for heritability estimation, shuffling the gene expression labels for the DGN-WB and GTEx cross-tissue and orthogonal tissue-specific expression from the linear mixed-model (see cross-tissue_results_2014-11-25.md). Plotted in gray is the sorted h2 from each permutation and in red is the sorted h2 from the observed data.
date <- Sys.Date()
library(dplyr)
library(ggplot2)
library(reshape2)
library(tidyr)
se <- function(x) sqrt(var(x,na.rm=T)/sum(!is.na(x)))
med <- function(x) median(x,na.rm=T)
lci<-function(x) quantile(x, c(.025, 0.975),na.rm=T)[1]
uci<-function(x) quantile(x, c(.025, 0.975),na.rm=T)[2]
sortf<-function(x) sort(x,na.last=FALSE) #sort and add NA's to top
"%&%" = function(a,b) paste(a,b,sep="")
mydir <- "/Users/heather/Dropbox/cross-tissue/"
tissues <- scan('tissue.list.prefix','character',sep='\t')
for(i in 1:length(tissues)){
tis <- tissues[i]
##local only
loconly <- read.table(mydir %&% tis %&% '_h2_localonly_allgenes_100perms_2014-12-19.txt',sep='\t')
data <- as.matrix(loconly[,5:105]) ##includes observed in first col
data<-apply(data,2,sortf) ##sort each column
data[is.na(data)] <- 0 ##replace NA's with 0
permatrix <- data[,2:ncol(data)] ##pull out the perms
permL <- melt(permatrix,id=x) ##convert to stacked data for ggplot
p1 <- ggplot(permL, aes(x=Var1,y=value,group=Var2))
p1 <- p1 + geom_line(color='gray')
obs <- as.matrix(data[,1]) ##pull out the observed data to plot thicker
obsL <- melt(obs,id=x) ##convert to stacked data for ggplot
losub <- p1 + geom_point(data=obsL,aes(x=Var1,y=value),color='red') + xlab("Genes sorted by h2") + ylab("h2") + ylim(0,1) + ggtitle(tis %&% ' Local (Only) Heritability')
print(losub)
##global only
gloonly <- read.table(tis %&% '_h2_globalonly_allgenes_100perms_2014-12-19.txt',sep='\t')
data <- as.matrix(gloonly[,5:105]) ##includes observed in first col
data<-apply(data,2,sortf) ##sort each column
data[is.na(data)] <- 0 ##replace NA's with 0
permatrix <- data[,2:ncol(data)] ##pull out the perms
permL <- melt(permatrix,id=x) ##convert to stacked data for ggplot
p1 <- ggplot(permL, aes(x=Var1,y=value,group=Var2))
p1 <- p1 + geom_line(color='gray')
obs <- as.matrix(data[,1]) ##pull out the observed data to plot thicker
obsL <- melt(obs,id=x) ##convert to stacked data for ggplot
gosub <- p1 + geom_point(data=obsL,aes(x=Var1,y=value),color='red') + xlab("Genes sorted by h2") + ylab("h2") + ylim(0,1) + ggtitle(tis %&% ' Global (Only) Heritability')
print(gosub)
##local joint
locjoint <- read.table(tis %&% '_h2_localjoint_allgenes_100perms_2014-12-19.txt',sep='\t')
data <- as.matrix(locjoint[,5:105]) ##includes observed in first col
data<-apply(data,2,sortf) ##sort each column
data[is.na(data)] <- 0 ##replace NA's with 0
permatrix <- data[,2:ncol(data)] ##pull out the perms
permL <- melt(permatrix,id=x) ##convert to stacked data for ggplot
p1 <- ggplot(permL, aes(x=Var1,y=value,group=Var2))
p1 <- p1 + geom_line(color='gray')
obs <- as.matrix(data[,1]) ##pull out the observed data to plot thicker
obsL <- melt(obs,id=x) ##convert to stacked data for ggplot
ljsub <- p1 + geom_point(data=obsL,aes(x=Var1,y=value),color='red') + xlab("Genes sorted by h2") + ylab("h2") + ylim(0,1) + ggtitle(tis %&% ' Local (Joint) Heritability')
print(ljsub)
##global joint
glojoint <- read.table(tis %&% '_h2_globaljoint_allgenes_100perms_2014-12-19.txt',sep='\t')
data <- as.matrix(glojoint[,5:105]) ##includes observed in first col
data<-apply(data,2,sortf) ##sort each column
data[is.na(data)] <- 0 ##replace NA's with 0
permatrix <- data[,2:ncol(data)] ##pull out the perms
permL <- melt(permatrix,id=x) ##convert to stacked data for ggplot
p1 <- ggplot(permL, aes(x=Var1,y=value,group=Var2))
p1 <- p1 + geom_line(color='gray')
obs <- as.matrix(data[,1]) ##pull out the observed data to plot thicker
obsL <- melt(obs,id=x) ##convert to stacked data for ggplot
gjsub <- p1 + geom_point(data=obsL,aes(x=Var1,y=value),color='red') + xlab("Genes sorted by h2") + ylab("h2") + ylim(0,1) + ggtitle(tis %&% ' Global (Joint) Heritability')
print(gjsub)
}
Histograms of the 95% CIs for the local h2 estimation in the marginal model (local GRM) and the joint model (local GRM + global GRM) are plotted.
for(i in 1:length(tissues)){
tis <- tissues[i]
##marginal model (local GRM model)
loconly <- read.table(mydir %&% tis %&% '_h2_localonly_allgenes_100perms_2014-12-19.txt',sep='\t')
perms <- loconly[,6:105]
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
marginalCI <- maxperms-minperms
marginalCI[is.na(marginalCI)] <- 0 ##replace NA's with 0
marginal <- data.frame(marginalCI,"Marginal")
colnames(marginal) <- c("range","model")
##joint model (local + global GRM model)
locjoint <- read.table(tis %&% '_h2_localjoint_allgenes_100perms_2014-12-19.txt',sep='\t')
perms <- locjoint[,6:105]
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
jointCI <- maxperms-minperms
jointCI[is.na(jointCI)] <- 0 ##replace NA's with 0
joint <- data.frame(jointCI,"Joint")
colnames(joint) <- c("range","model")
all <- rbind(marginal,joint)
p1 <- ggplot(all, aes(x = range, fill = model)) + geom_histogram() + xlab("95% CI (upper - lower)") + ggtitle(tis %&% ' Local GRM h2 estimate 95% CIs')+ facet_grid(model~.)
print(p1)
res <- t.test(marginalCI,jointCI) ## is diff b/t joint and marginal CIs significant?
print(res)
}
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = -0.8945, df = 26335.84, p-value = 0.3711
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.412138e-04 9.003856e-05
## sample estimates:
## mean of x mean of y
## 0.02562141 0.02569699
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 1.0935, df = 36870.94, p-value = 0.2742
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0001553194 0.0005472863
## sample estimates:
## mean of x mean of y
## 0.05661511 0.05641913
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 19.249, df = 24423.07, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01065313 0.01306863
## sample estimates:
## mean of x mean of y
## 0.09561075 0.08374987
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 20.8775, df = 25373.06, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01261666 0.01523111
## sample estimates:
## mean of x mean of y
## 0.10162855 0.08770467
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 19.4178, df = 26059.15, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01101761 0.01349160
## sample estimates:
## mean of x mean of y
## 0.10178813 0.08953353
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 20.3456, df = 24508.09, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01053132 0.01277677
## sample estimates:
## mean of x mean of y
## 0.08217013 0.07051609
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 17.9702, df = 25571.17, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01028001 0.01279708
## sample estimates:
## mean of x mean of y
## 0.09449416 0.08295562
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 18.7361, df = 26580.07, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01048682 0.01293731
## sample estimates:
## mean of x mean of y
## 0.10118889 0.08947682
##
## Welch Two Sample t-test
##
## data: marginalCI and jointCI
## t = 19.2133, df = 27632.08, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.006258716 0.007680758
## sample estimates:
## mean of x mean of y
## 0.08061716 0.07364742
Plotted in black is the median h2 of the 100 permutations, in gray is the 95% CI, and in red is the h2 from the observed data for the corresponding gene.
for(i in 1:length(tissues)){
tis <- tissues[i]
##local only
loconly <- read.table(mydir %&% tis %&% '_h2_localonly_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- loconly[,5]
perms <- loconly[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
p1 <- ggplot(data,aes(x=1:nrow(data),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Local (Only) Heritability')
losub <- p1 + geom_point(data=data,aes(x=1:nrow(data),y=obs),color='red') + xlab("Genes sorted by observed h2") + ylab("h2") + ylim(0,1)
print(losub)
##global only
gloonly <- read.table(tis %&% '_h2_globalonly_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- gloonly[,5]
perms <- gloonly[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
p1 <- ggplot(data,aes(x=1:nrow(data),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Global (Only) Heritability')
gosub <- p1 + geom_point(data=data,aes(x=1:nrow(data),y=obs),color='red') + xlab("Genes sorted by observed h2") + ylab("h2") + ylim(0,1)
print(gosub)
##local joint
locjoint <- read.table(tis %&% '_h2_localjoint_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- locjoint[,5]
perms <- locjoint[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
p1 <- ggplot(data,aes(x=1:nrow(data),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Local (Joint) Heritability')
ljsub <- p1 + geom_point(data=data,aes(x=1:nrow(data),y=obs),color='red') + xlab("Genes sorted by observed h2") + ylab("h2") + ylim(0,1)
print(ljsub)
##global joint
glojoint <- read.table(tis %&% '_h2_globaljoint_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- glojoint[,5]
perms <- glojoint[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
p1 <- ggplot(data,aes(x=1:nrow(data),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Global (Joint) Heritability')
gjsub <- p1 + geom_point(data=data,aes(x=1:nrow(data),y=obs),color='red') + xlab("Genes sorted by observed h2") + ylab("h2") + ylim(0,1)
print(gjsub)
}
for(i in 1:length(tissues)){
tis <- tissues[i]
##local only
loconly <- read.table(mydir %&% tis %&% '_h2_localonly_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- loconly[,5]
perms <- loconly[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
gt05 <- data[data[,1]>0.05,] ##only genes with obs h2 > 0.05
p1 <- ggplot(gt05,aes(x=1:nrow(gt05),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Local (Only) Heritability')
losub <- p1 + geom_point(data=gt05,aes(x=1:nrow(gt05),y=obs),color='red') + xlab("Genes sorted by observed h2 > 0.05") + ylab("h2") + ylim(0,1)
print(losub)
##global only
gloonly <- read.table(tis %&% '_h2_globalonly_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- gloonly[,5]
perms <- gloonly[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
gt05 <- data[data[,1]>0.05,] ##only genes with obs h2 > 0.05
p1 <- ggplot(gt05,aes(x=1:nrow(gt05),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Global (Only) Heritability')
gosub <- p1 + geom_point(data=gt05,aes(x=1:nrow(gt05),y=obs),color='red') + xlab("Genes sorted by observed h2 > 0.05") + ylab("h2") + ylim(0,1)
print(gosub)
##local joint
locjoint <- read.table(tis %&% '_h2_localjoint_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- locjoint[,5]
perms <- locjoint[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
gt05 <- data[data[,1]>0.05,] ##only genes with obs h2 > 0.05
p1 <- ggplot(gt05,aes(x=1:nrow(gt05),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Local (Joint) Heritability')
ljsub <- p1 + geom_point(data=gt05,aes(x=1:nrow(gt05),y=obs),color='red') + xlab("Genes sorted by observed h2 > 0.05") + ylab("h2") + ylim(0,1)
print(ljsub)
##global joint
glojoint <- read.table(tis %&% '_h2_globaljoint_allgenes_100perms_2014-12-19.txt',sep='\t')
obs <- glojoint[,5]
perms <- glojoint[,6:105]
medperms <- apply(perms,1,med)
seperms <- apply(perms,1,se)
minperms <- apply(perms,1,lci)
maxperms <- apply(perms,1,uci)
data <- data.frame(obs,medperms,seperms,minperms,maxperms)
data <- data %>% arrange(obs)
data <- data[complete.cases(data),]
gt05 <- data[data[,1]>0.05,] ##only genes with obs h2 > 0.05
p1 <- ggplot(gt05,aes(x=1:nrow(gt05),y=medperms,ymin=minperms, ymax=maxperms) ) + geom_pointrange(col='gray')+geom_point(col='black')+ggtitle(tis %&% ' Global (Joint) Heritability')
gjsub <- p1 + geom_point(data=gt05,aes(x=1:nrow(gt05),y=obs),color='red') + xlab("Genes sorted by observed h2 > 0.05") + ylab("h2") + ylim(0,1)
print(gjsub)
}